package com.cntaige.collect.ganji;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import com.cntaige.collect.entity.Enterprise;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.cntaige.collect.Main;
import com.cntaige.collect.utils.JsoupHtml;
import com.cntaige.collect.utils.UtilTools;

public class GanjiCollect {
    private String cityUrl;// 要采集的城市地址
    public static String GAN_JI_HONE_URL = "http://www.ganji.com";
    private JsoupHtml jsoupHtml;
    private int prePageNum;
    private List<HashMap<String, String>> list;// 保存分类
    private GanjiDB ganjiDB;

    public GanjiCollect() {
        // TODO Auto-generated constructor stub
        list = new ArrayList<HashMap<String, String>>();
        ganjiDB = GanjiDB.getInstance();
    }

    public void start() {
        System.out.println("正在获取赶集网城市列表...");
        getCategoryUrl(getCityUrl());
        getInputContent();
        ganjiDB.startCollectInfo(this);
        ganjiDB.saveExcel(this);
    }

    public String getCityUrl() {
        // 获取要采集的城市
        List<HashMap<String, String>> list = new ArrayList<HashMap<String, String>>();// 用于存放城市的名称及地址
        try {
            JsoupHtml jsoupHtml = new JsoupHtml(
                    "http://www.ganji.com/index.htm");
            for (Element dlElement : jsoupHtml.select(".all-city dl")) {
                Elements dtElements = dlElement.select("dt");
                Elements ddElements = dlElement.select("dd");
                for (int i = 0; i < dtElements.size(); i++) {
                    System.out.println("--------" + dtElements.get(i).text()
                            + "--------");
                    Elements aHrefElements = ddElements.get(i).select("a");
                    for (Element aHrefElement : aHrefElements) {
                        HashMap<String, String> hashMap = new HashMap<String, String>();// 存放名称及地址
                        hashMap.put("cityName", aHrefElement.text().trim());// 名称
                        hashMap.put("cityUrl", aHrefElement.attr("href").trim());// 地址
                        list.add(hashMap);
                        System.out.print(" " + list.indexOf(hashMap) + "、"
                                + aHrefElement.text().trim() + ".");
                    }
                    System.out.println("\r\n");
                }

            }
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        // 用户输入要采集的城市编号
        int num = 0;
        System.out.println("请输入要采集的城市编号：");
        while (true) {
            try {
                num = Main.getScanner().nextInt();
                if (num < 0 || num > list.size() - 1) {
                    System.out.println("无此编号的城市,请重新输入：");
                    continue;
                }
            } catch (Exception e) {
                // TODO: handle exception
                System.out.println("请输入正确的城市编号：");
                continue;
            }
            cityUrl = list.get(num).get("cityUrl");
            break;
        }
        String cityName = list.get(num).get("cityName");
        System.out.println("您要采集编号为  " + num + " " + cityName + "城市的信息。");
        return cityName;
    }

    public void getCompanyInfo(String url) {
        try {
            jsoupHtml = new JsoupHtml(url);
            if (!jsoupHtml.getText(".c-title").equals(""))// 是否是vip的公司
                getNormalCompanyInfo(jsoupHtml);
            else
                getVipCompanyInfo(jsoupHtml);
        } catch (IOException e) {
            // TODO 自动生成的 catch 块
            e.printStackTrace();
        }

    }

    private void getVipCompanyInfo(JsoupHtml jsoupHtml) {
        Enterprise enterprise = new Enterprise();
        // 获取vip公司的信息
        for (Element element : jsoupHtml.getElementsClass("content")) {
            Elements elements = element.select("li");
            if (element.getElementById("company_description") != null) {
                elements.select("span").remove();
                enterprise.setCompanyName(elements.get(0).text().trim().split("：")[1]);// 公司名称
                enterprise.setCategory(elements.get(2).text().trim().split("：")[1]);// 行业
                Elements elements2 = element.select("p");// 地址和公司介绍p标签
                String addr = elements2.get(1).text().trim().split("：")[1];// 地址
                String companyIntr = elements2.get(0).text();// 公司介绍
                ganjiDB.insertCompanyInfo(enterprise);
            }
        }
    }

    private void getNormalCompanyInfo(JsoupHtml jsoupHtml) {
        Enterprise enterprise = new Enterprise();
        // 获取普通公司信息
        for (Element element : jsoupHtml.getElementsClass("c-introduce")) {
            Elements elements = element.getElementsByTag("li");
            elements.select("em").remove();
//            elements.select("span").remove();
            Elements infoType = element.getElementsByTag("H6");
            if (infoType.text().equals("基本信息")) {
                enterprise.setCompanyName(getItemInfo(elements, 0));// 公司名称
                if (enterprise.getCompanyName().equals("")) {
                    continue;
                }
                enterprise.setCompanyScale(getItemInfo(elements, 1));
                enterprise.setCategory(getItemInfo(elements, 2));// 行业
                enterprise.setCompanyType(getItemInfo(elements, 3));// 公司类型
                enterprise.setCompanyIntr(element.select("#company_description").text());// 公司介绍
            } else if (infoType.text().equals("注册信息")) {
                enterprise.setSocialUniformCode(getItemInfo(elements, 0));
                enterprise.setEstablishDate(getItemInfo(elements, 1));
                enterprise.setOrganizationCode(getItemInfo(elements, 2));
                enterprise.setOperatePeriod(getItemInfo(elements, 3));
                enterprise.setRegisterAuthority(getItemInfo(elements, 4));
                enterprise.setOperateStatus(getItemInfo(elements, 5));
                enterprise.setRegisterAddress(getItemInfo(elements, 6));
                enterprise.setRegisterCapital(getItemInfo(elements, 7));
                enterprise.setEnterpriseType(getItemInfo(elements, 8));
                enterprise.setBusinessScope(getItemInfo(elements, 9));
            }
        }
        ganjiDB.insertCompanyInfo(enterprise);
    }

    private String getItemInfo(Elements elements, Integer index) {
        if (elements.size() <= index) {
            return "";
        }
        Elements info = elements.get(index).getElementsByClass("item-info");
        return info.get(0).text();
    }

    private void getCompanyUrl(String url) {
        // 获取公司信息url
        System.out.println("--------------------------");
        System.out.println(UtilTools.getNowTime());
        System.out.println("开始采集公司链接");
        if (prePageNum == 0)
            System.out.println("采集第1页");
        else
            System.out.println("采集第" + prePageNum + "页");
        try {
            jsoupHtml = new JsoupHtml(url);
            for (Element element : jsoupHtml.getElementsTag("a")) {
                String url_ = element.attr("href");
                if (url_.matches(".*/gongsi.*")) {
                    int su = url_.lastIndexOf("?");
                    String companyUrl = url_;
                    if (su != -1)
                        companyUrl = url_.substring(0, su);
                    ganjiDB.insertCompanyLinks(companyUrl,
                            element.attr("title"));
                }
                String reg = ".*/o[0-9]*/";
                if (url_.matches(reg)) {
                    String[] pys = url_.split("/o");
                    int pageNum = Integer.parseInt(pys[1].substring(0,
                            pys[1].length() - 1));
                    if (pageNum > prePageNum) {
                        prePageNum = pageNum;
                        String pageUrl = cityUrl + url_;
                        getCompanyUrl(pageUrl);
                    }
                }
            }

        } catch (IOException e) {
            // TODO 自动生成的 catch 块
            e.printStackTrace();
        }
    }

    public String getImage(String url) {
        // 下载电话图片
        try {
            String path = Main.getCollectPath() + "cache/"
                    + url.substring(url.indexOf("=") + 1) + ".png";
            return UtilTools.getNetImage(url, path);
        } catch (IOException e) {
            // TODO 自动生成的 catch 块
            // e.printStackTrace();
        }
        return null;
    }

    private void showCategory() {
        // 显示所有分类目录
        StringBuilder stringBuilder = new StringBuilder();
        stringBuilder
                .append("分类编号：\n-----------------------------------------------------------------\n");
        for (int i = 0; i < list.size(); i++) {
            stringBuilder
                    .append(i + "：" + list.get(i).get("title") + "   |   ");
            if ((i + 1) % 3 == 0)
                stringBuilder
                        .append("\n-----------------------------------------------------------------\n");
        }
        stringBuilder
                .append("\n-----------------------------------------------------------------");
        System.out.println(stringBuilder.toString());

    }

    private void getInputContent() {
        // 获取输入的内容
        System.out.println("请输入要采集的分类编号：");
        while (true) {
            try {
                int index = Main.getScanner().nextInt();// 输入分类编号采集
                if (index < 0 || index > list.size() - 1) {
                    System.out.println("请输入正确的编号：");
                    continue;
                }
                collectNumIndex(index);
                break;
            } catch (Exception e) {
                // TODO: handle exception
                e.printStackTrace();
                System.out.println("请输入正确的编号：");
            }
        }
        Main.getScanner().close();
    }

    private void collectNumIndex(int index) {
        // 按分类编号采集
        String url = list.get(index).get("url");
        System.out.println("您要采集的分类：" + list.get(index).get("title"));
        System.out.println("分类链接：" + url);
        getCompanyUrl(url);
    }

    private void getCategoryUrl(String city) {
        // 获取分类目录
        System.out.println("正在获取'" + city + "'城市的分类目录...");
        try {
            jsoupHtml = new JsoupHtml("http://nn.ganji.com/zhaopin/");
            Elements elements = jsoupHtml.getElementsTag("dt");
            for (Element element : elements) {
                String zp = element.select("a").attr("href");
                HashMap<String, String> hashMap = new HashMap<>();
                String title = element.select("a").text().trim();
                if (title.equals(""))
                    continue;
                hashMap.put("title", title);
                hashMap.put("url", cityUrl + zp);
                list.add(hashMap);
            }
            showCategory();
        } catch (IOException e) {
            // TODO 自动生成的 catch 块
            e.printStackTrace();
        }
    }

}
